// Copyright 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "ui/app_list/search/tokenized_string_match.h"

#include <stddef.h>

#include <cmath>

#include "base/i18n/string_search.h"
#include "base/logging.h"
#include "base/macros.h"
#include "ui/app_list/search/tokenized_string_char_iterator.h"

namespace app_list {

namespace {

    // The factors below are applied when the current char of query matches
    // the current char of the text to be matched. Different factors are chosen
    // based on where the match happens. kIsPrefixMultiplier is used when the
    // matched portion is a prefix of both the query and the text, which implies
    // that the matched chars are at the same position in query and text. This is
    // the most preferred case thus it has the highest score. When the current char
    // of the query and the text does not match, the algorithm moves to the next
    // token in the text and try to match from there. kIsFrontOfWordMultipler will
    // be used if the first char of the token matches the current char of the query.
    // Otherwise, the match is considered as weak and kIsWeakHitMultiplier is
    // used.
    // Examples:
    //   Suppose the text to be matched is 'Google Chrome'.
    //   Query 'go' would yield kIsPrefixMultiplier for each char.
    //   Query 'gc' would use kIsPrefixMultiplier for 'g' and
    //       kIsFrontOfWordMultipler for 'c'.
    //   Query 'ch' would use kIsFrontOfWordMultipler for 'c' and
    //       kIsWeakHitMultiplier for 'h'.
    //   Query 'oo' does not match any prefix and would use the substring match
    //       fallback, thus kIsSubstringMultiplier is used for each char.
    const double kIsPrefixMultiplier = 1.0;
    const double kIsFrontOfWordMultipler = 0.8;
    const double kIsWeakHitMultiplier = 0.6;
    const double kIsSubstringMultiplier = 0.4;

    // A relevance score that represents no match.
    const double kNoMatchScore = 0.0;

    // PrefixMatcher matches the chars of a given query as prefix of tokens in
    // a given text or as prefix of the acronyms of those text tokens.
    class PrefixMatcher {
    public:
        PrefixMatcher(const TokenizedString& query,
            const TokenizedString& text)
            : query_iter_(query)
            , text_iter_(text)
            , current_match_(gfx::Range::InvalidRange())
            , current_relevance_(kNoMatchScore)
        {
        }

        // Invokes RunMatch to perform prefix match. Use |states_| as a stack to
        // perform DFS (depth first search) so that all possible matches are
        // attempted. Stops on the first full match and returns true. Otherwise,
        // returns false to indicate no match.
        bool Match()
        {
            while (!RunMatch()) {
                // No match found and no more states to try. Bail out.
                if (states_.empty()) {
                    current_relevance_ = kNoMatchScore;
                    current_hits_.clear();
                    return false;
                }

                PopState();

                // Skip restored match to try other possibilites.
                AdvanceToNextTextToken();
            }

            if (current_match_.IsValid())
                current_hits_.push_back(current_match_);

            return true;
        }

        double relevance() const { return current_relevance_; }
        const TokenizedStringMatch::Hits& hits() const { return current_hits_; }

    private:
        // Context record of a match.
        struct State {
            State()
                : relevance(kNoMatchScore)
            {
            }
            State(double relevance,
                const gfx::Range& current_match,
                const TokenizedStringMatch::Hits& hits,
                const TokenizedStringCharIterator& query_iter,
                const TokenizedStringCharIterator& text_iter)
                : relevance(relevance)
                , current_match(current_match)
                , hits(hits.begin(), hits.end())
                , query_iter_state(query_iter.GetState())
                , text_iter_state(text_iter.GetState())
            {
            }

            // The current score of the processed query chars.
            double relevance;

            // Current matching range.
            gfx::Range current_match;

            // Completed matching ranges of the processed query chars.
            TokenizedStringMatch::Hits hits;

            // States of the processed query and text chars.
            TokenizedStringCharIterator::State query_iter_state;
            TokenizedStringCharIterator::State text_iter_state;
        };
        typedef std::vector<State> States;

        // Match chars from the query and text one by one. For each matching char,
        // calculate relevance and matching ranges. And the current stats is
        // recorded so that the match could be skipped later to try other
        // possiblities. Repeat until any of the iterators run out. Return true if
        // query iterator runs out, i.e. all chars in query are matched.
        bool RunMatch()
        {
            while (!query_iter_.end() && !text_iter_.end()) {
                if (query_iter_.Get() == text_iter_.Get()) {
                    PushState();

                    if (query_iter_.GetArrayPos() == text_iter_.GetArrayPos())
                        current_relevance_ += kIsPrefixMultiplier;
                    else if (text_iter_.IsFirstCharOfToken())
                        current_relevance_ += kIsFrontOfWordMultipler;
                    else
                        current_relevance_ += kIsWeakHitMultiplier;

                    if (!current_match_.IsValid())
                        current_match_.set_start(text_iter_.GetArrayPos());
                    current_match_.set_end(text_iter_.GetArrayPos() + text_iter_.GetCharSize());

                    query_iter_.NextChar();
                    text_iter_.NextChar();
                } else {
                    AdvanceToNextTextToken();
                }
            }

            return query_iter_.end();
        }

        // Skip to the next text token and close current match. Invoked when a
        // mismatch happens or to skip a restored match.
        void AdvanceToNextTextToken()
        {
            if (current_match_.IsValid()) {
                current_hits_.push_back(current_match_);
                current_match_ = gfx::Range::InvalidRange();
            }

            text_iter_.NextToken();
        }

        void PushState()
        {
            states_.push_back(State(current_relevance_,
                current_match_,
                current_hits_,
                query_iter_,
                text_iter_));
        }

        void PopState()
        {
            DCHECK(!states_.empty());

            State& last_match = states_.back();
            current_relevance_ = last_match.relevance;
            current_match_ = last_match.current_match;
            current_hits_.swap(last_match.hits);
            query_iter_.SetState(last_match.query_iter_state);
            text_iter_.SetState(last_match.text_iter_state);

            states_.pop_back();
        }

        TokenizedStringCharIterator query_iter_;
        TokenizedStringCharIterator text_iter_;

        States states_;
        gfx::Range current_match_;

        double current_relevance_;
        TokenizedStringMatch::Hits current_hits_;

        DISALLOW_COPY_AND_ASSIGN(PrefixMatcher);
    };

} // namespace

TokenizedStringMatch::TokenizedStringMatch()
    : relevance_(kNoMatchScore)
{
}

TokenizedStringMatch::~TokenizedStringMatch() { }

bool TokenizedStringMatch::Calculate(const TokenizedString& query,
    const TokenizedString& text)
{
    relevance_ = kNoMatchScore;
    hits_.clear();

    PrefixMatcher matcher(query, text);
    if (matcher.Match()) {
        relevance_ = matcher.relevance();
        hits_.assign(matcher.hits().begin(), matcher.hits().end());
    }

    // Substring match as a fallback.
    if (relevance_ == kNoMatchScore) {
        size_t substr_match_start = 0;
        size_t substr_match_length = 0;
        if (base::i18n::StringSearchIgnoringCaseAndAccents(query.text(),
                text.text(),
                &substr_match_start,
                &substr_match_length)) {
            relevance_ = kIsSubstringMultiplier * substr_match_length;
            hits_.push_back(gfx::Range(substr_match_start,
                substr_match_start + substr_match_length));
        }
    }

    // Temper the relevance score with an exponential curve. Each point of
    // relevance (roughly, each keystroke) is worth less than the last. This means
    // that typing a few characters of a word is enough to promote matches very
    // high, with any subsequent characters being worth comparatively less.
    // TODO(mgiuca): This doesn't really play well with Omnibox results, since as
    // you type more characters, the app/omnibox results tend to jump over each
    // other.
    relevance_ = 1.0 - std::pow(0.5, relevance_);

    return relevance_ > kNoMatchScore;
}

bool TokenizedStringMatch::Calculate(const base::string16& query,
    const base::string16& text)
{
    const TokenizedString tokenized_query(query);
    const TokenizedString tokenized_text(text);
    return Calculate(tokenized_query, tokenized_text);
}

} // namespace app_list
